/*
 COVID 19 Data Reliabity and Institutions
 
 Authors: 
 Fernando Barros Jr - FEARP/USP - fabarrosjr@usp.br
 Daniel Galveas - Fucape Business School - galveas.daniel@gmail.com
 Fabio Y. S. Motoki - University of East Anglia – Norwich Business School - f.motoki@uea.ac.uk
 Victor R. S. Rodrigues - Federal University of Espirito Santo - victorrangelrodrigues@gmail.com
 
*/

//Check if user-written commands installed
foreach package in winsor distinct carryforward ivreg2 ranktest {
	capture which `package'
	if _rc==111 {
		ssc install `package'
	}
	else {
		di "`package' is good to go"
	} 
}
//Packages != command name or not in SSC
/*
ssc install egenmore
ssc install rtfutil
net install estout
*/

clear all

cd "`:environment USERPROFILE'\OneDrive - University of East Anglia\Projetos\Fernando Victor Galveas - Institutions transparency\data"

capture log close
log using "COVID_Data_Reliability.smcl", replace

import excel "COVID_Data_Reliability.xlsx", firstrow sheet("painel")

label var pop "Population"
label var densid "Population density - people per sq. km of land area"
label var urban_pop "urban population - Percentage"
label var pop_65 "population older than 65 - Percentage"
label var gdp_capita "ppp gdp per capita"
label var gdp_growth "GDP Growth"
label var health_gdp "Amount of Gdp spent with Health - Percentage"
label var tourism "International tourism, receipts (% of total exports)"
label var temp "2019 Year Average Temperature"

label var rule_law "Rule of Law Index"
label var constraint_gov "Constraints on Government Powers"
label var absence_corrup "Absence of Corruption"
label var open_gov "Open Government "
label var right "Fundamental Rights"
label var order_sec "Order and Security"
label var regulatory_enforce "Regulatory Enforcement"
label var civil_justice "Civil Justice"
label var criminal_justice "Criminal Justice"

destring chi_square-english_lang, replace force

gen ln_pibc = ln(gdp_capita)
gen ln_densid = ln(densid)

replace tourism = tourism/100
replace gdp_growth = 1 + (gdp_growth/100)
replace health_gdp = health_gdp/100
replace pop_65 = pop_65/100
replace urban_pop = urban_pop/100
gen english_t = english_lang/10000000
gen european_t = european_lang/1000000



save varlist.dta, replace

clear all
import excel "COVID_Data_Reliability.xlsx", firstrow sheet("daily")

/*
Cases_on_an_international_conveyance_Japan
Wallis_and_Futuna
*/
drop if missing(code)
encode code, gen(id)

*18 days of negative number of cases
bysort id: egen has_neg_cases = max((cases < 0))
drop if cases < 0
label var has_neg_cases "Has reported negative #cases"

* excluding days before first report
gen cumulative_cases = 0
label var cumulative_cases "Total cases to date"
sort id dateRep
by id: replace cumulative_cases = cond(_n==1,cases,cases+cumulative_cases[_n-1])
drop if cumulative_cases == 0
//bysort id: egen count_cases = sum(cases)
//drop if count_cases == 0

* total sum of cases
bysort id: egen total_cases = total(cases)

* calculation frequency = total day with report related with total days after first case
gen skip_date = 0
by id: replace skip_date = cond(_n==1,0,dateRep-dateRep[_n-1]!=1)
label var skip_date "Not consecutive day"
sum skip_date
gen days_skipped = 0
by id: replace days_skipped = cond(_n==1,0,dateRep-dateRep[_n-1]-1)
label var days_skipped "#days skipped"
sum days_skipped

foreach cutoff in 1 100 {
	gen dc_`cutoff' = 0

	bysort id: egen dwc_`cutoff' = count(dateRep) if cumulative_cases >= `cutoff' & cases > 0
	replace dwc_`cutoff' = 0 if missing(dwc_`cutoff')
	bysort id: egen days_with_cases_`cutoff' = max(dwc_`cutoff')
	drop dwc_`cutoff'
	label var days_with_cases_`cutoff' "Total days with cases reported from case `cutoff'"
	
	quietly levelsof id
	foreach country in `r(levels)' {
		di "***** Will process country id `country'"
		local conditions id == `country' & cumulative_cases >= `cutoff'
		quietly sum dateRep if `conditions'
		replace dc_`cutoff' = `r(max)' - `r(min)' + 1 if `conditions'
	}
	//replace dc_`cutoff' = 0 if missing(replace dc_`cutoff')
	bysort id: egen days_case_`cutoff' = max(dc_`cutoff')
	replace days_case_`cutoff' = 0 if missing(days_case_`cutoff')
	drop dc_`cutoff'
	label var days_case_`cutoff' "Total days from case `cutoff'"

	gen freq_`cutoff' = days_with_cases_`cutoff' / days_case_`cutoff'
	label var freq_`cutoff' "Prop. days w/ cases since case `cutoff'"
}
sum freq_*

duplicates drop id, force
keep code total_cases freq_*

twoway (histogram freq_1 , start(0) width(.01) color(red%30)) ///        
       (histogram freq_100, start(0) width(.01) color(green%30)), ///   
       legend(order(1 "From 1st case" 2 "From 100th case" ))
graph export from_0pct.png, replace

sum freq_*, detail

preserve
//If a country does not report on weekends, it will have a report (1 - 2/7)*100% of the time = 71.43%
drop if freq_100 < 0.7
drop if missing(freq_100)

twoway (histogram freq_1 , start(0.2) width(.01) color(red%30)) ///        
       (histogram freq_100, start(0.2) width(.01) color(green%30)), ///   
       legend(order(1 "From 1st case" 2 "From 100th case" ))
graph export from_70pct.png, replace
restore

save cases.dta, replace

use varlist.dta, clear
merge 1:1 code using cases.dta
drop if _merge != 3
drop _merge
save covid_institutions.dta, replace

rm cases.dta
rm varlist.dta

* ----------------------------------------------------------------------------*
* 							RESULTADOS PRINCIPAIS
* ----------------------------------------------------------------------------*
use covid_institutions.dta, clear


drop if missing(ln_densid, pop_65, ln_pibc, gdp_growth, health_gdp, temp, tourism, rule_law )

label var ln_densid "ln(Population density)"
label var pop_65 "Population over 65y (prop.)"
label var ln_pibc "Ln(GDP per capita)"
label var health_gdp "Health Expenditure (GDP prop.)"
label var tourism "International tourism $ (Exports prop.)"

replace gdp_growth = (gdp_growth - 1)

tabstat mad ln_densid pop_65 ln_pibc gdp_growth health_gdp temp tourism ///
		rule_law constraint_gov absence_corrup open_gov right order_sec ///
		regulatory_enforce civil_justice criminal_justice, ///
statistics(count mean cv min p25 p50 p75 max ) columns(statistics)

pwcorr rule_law constraint_gov absence_corrup open_gov right order_sec regulatory_enforce civil_justice criminal_justice

sum total_cases, detail

foreach ds_state in all freq_70pct {
	
	if "`ds_state'" == "freq_70pct" {
		preserve
		drop if freq_100 < 0.7
		local ds pc70_
	}
	
	foreach v in 0 1000 5000 10000 {

		display(" ")
		display(" ")
		display(" ")
		display(" ")
		display(" ")
		display(" ")

		display("========================================")
		display("========================================")
		display("	More than `v' cases, dataset = `ds_state'")
		display("========================================")
		display("========================================")


		foreach var of varlist rule_law constraint_gov absence_corrup open_gov right ///
							   order_sec regulatory_enforce civil_justice criminal_justice  {
			
			reg mad `var' ln_densid pop_65 ln_pibc gdp_growth health_gdp temp tourism if total_cases > `v', r
			estimates store `ds'ols_`var'

		
			local law_inst law_uk law_french law_german
			if `v' == 0 {
				local law_inst `law_inst' law_scand
			}
			
			ivreg2 mad ln_densid pop_65 ln_pibc gdp_growth health_gdp temp tourism ///
			(`var' = `law_inst') if total_cases > `v', r gmm2s sfirst savesfirst
			estimates store `ds'iv_`var'
			estadd scalar F_1st = e(first)[4,1]

			ivreg2 mad ln_densid pop_65 ln_pibc gdp_growth health_gdp temp tourism ///
			(`var' =  `law_inst' english_t) if total_cases > `v', r gmm2s sfirst savesfirst
			estimates store `ds'iv2_`var'
			estadd scalar F_1st = e(first)[4,1]

		}

		* Frequency Table
		//tab range_freq if total_cases > `v'
		
		quietly sum freq_1
		local min_f1 = round(`r(min)',0.1)-0.1
		quietly sum freq_100
		local min_f100 = round(`r(min)',0.1)-0.1
		local beg = min(`min_f1',`min_f100')
		
		twoway (histogram freq_1 if total_cases > `v', start(`beg') width(.01) color(red%30)) ///        
			   (histogram freq_100 if total_cases > `v', start(`beg') width(.01) color(green%30)), ///   
			   legend(order(1 "From 1st case" 2 "From 100th case" ))
		graph export `ds'totcases`v'.png, replace
		
		local outparams obslast se(%9.3f) b(%16.3f) star(* 0.1 ** 0.05 *** 0.01) nolines nogaps label compress noobs /*
		*/ r2(%9.3f) nobaselevels order(rule_law constraint_gov absence_corrup open_gov right order_sec /*
		*/ regulatory_enforce civil_justice criminal_justice *)

		local olsparams scalars("N Observations") sfmt(%9.0fc)

		local ivparams scalars("F_1st $F$ First-stage regression" "jp Hansej J p-value" "widstat Sanderson-Windmeijer F" "cdf Cragg-Donald Wald" "j Hansej J" "N Observations") sfmt(%9.2fc %9.3fc %9.2fc %9.2fc %9.2fc %9.0fc)

		tempname handle1
		rtfopen `handle1' using "`ds'results_mincases`v'.rtf", replace 
		//Setting up an A4 RTF in landscape and font size 10 (20 half points)
		file write `handle1' "\paperw16834\paperh11909\fs20" _n
		file write `handle1' "OLS regressions"
		rtfclose `handle1'

		esttab `ds'ols_*, `outparams' `olsparams'
		esttab `ds'ols_* using `ds'results_mincases`v'.rtf, `outparams' append `olsparams'
		esttab `ds'ols_* using `ds'results_ols_mincases`v'.csv, `outparams' replace `olsparams'
		esttab `ds'ols_* using `ds'results_ols_mincases`v'.tex, `outparams' replace `olsparams'

		//Page break
		rtfappend `handle1' using "`ds'results_mincases`v'.rtf", replace
		file write `handle1' "\page" _n
		file write `handle1' "IV1 regressions - legal origin"
		rtfclose `handle1'

		esttab `ds'iv_*, `outparams' `ivparams'
		esttab `ds'iv_* using `ds'results_mincases`v'.rtf, `outparams' append `ivparams'
		esttab `ds'iv_* using `ds'results_iv_mincases`v'.csv, `outparams' replace `ivparams'
		esttab `ds'iv_* using `ds'results_iv_mincases`v'.tex, `outparams' replace `ivparams'

		//Page break
		rtfappend `handle1' using "`ds'results_mincases`v'.rtf", replace
		file write `handle1' "\page" _n
		file write `handle1' "IV2 regressions - legal origin + % language-speaking"
		rtfclose `handle1'

		esttab `ds'iv2_*, `outparams' `ivparams'
		esttab `ds'iv2_* using `ds'results_mincases`v'.rtf, `outparams' append `ivparams'

		esttab `ds'iv2_* using `ds'results_iv2_mincases`v'.csv, `outparams' replace `ivparams'
		esttab `ds'iv2_* using `ds'results_iv2_mincases`v'.tex, `outparams' replace `ivparams'
	}

	if "`ds_state'" == "freq_70pct" {
		restore
	}
}

log close